\subsection{Definitions}
\begin{definition}
	Let \( E \) be a (nonempty) set. A collection \( \mathcal E \) of subsets of \( E \) is called a \emph{\( \sigma \)-algebra} if the following properties hold:
	\begin{itemize}
		\item \( \varnothing \in \mathcal E \);
		\item \( A \in \mathcal E \implies A^c = E \setminus A \in \mathcal E \);
		\item if \( (A_n)_{n \in \mathbb N} \) is a countable collection of sets in \( \mathcal E \), \( \bigcup_{n \in \mathbb N} A_n \in \mathcal E \).
	\end{itemize}
\end{definition}
\begin{example}
	Let \( \mathcal E = \qty{\varnothing, E} \).
	This is a \( \sigma \)-algebra.
	Also, \( \mathcal P(E) = \qty{A \subseteq E} \) is a \( \sigma \)-algebra.
\end{example}
\begin{remark}
	Since \( \bigcap_n A_n = \qty(\bigcup_n A_n^c)^c \), any \( \sigma \)-algebra \( \mathcal E \) is closed under countable intersections as well as under countable unions.
	Note that \( B \setminus A = B \cap A^c \in \mathcal E \), so \( \sigma \)-algebras are closed under set difference.
\end{remark}
\begin{definition}
	A set \( E \) with a \( \sigma \)-algebra \( \mathcal E \) is called a \emph{measurable space}.
	The elements of \( \mathcal E \) are called \emph{measurable sets}.
\end{definition}
\begin{definition}
	A \emph{measure} \( \mu \) is a set function \( \mu : \mathcal E \to [0,\infty] \), such that \( \mu(\varnothing) = 0 \), and for a sequence \( (A_n)_{n \in \mathbb N} \) such that the \( A_n \) are disjoint, we have
	\[ \mu\qty(\bigcup_{n \in \mathbb N} A_n) = \sum_{n \in \mathbb N} \mu(A_n) \]
	This is the \emph{countable additivity} property of the measure.
\end{definition}
\begin{remark}
	If \( E \) is countable, then for any \( A \in \mathcal P(E) \) and measure \( \mu \), we have
	\[ \mu(A) = \mu\qty(\bigcup_{x\in A} \qty{x}) = \sum_{x \in A} \mu(\qty{x}) \]
	Hence, measures are uniquely defined by the measure of each singleton.
	This corresponds to the notion of a probability mass function.
\end{remark}
\begin{definition}
	For a collection \( \mathcal A \) of subsets of \( E \), we define the \( \sigma \)-algebra \emph{\( \sigma(A) \) generated by \( \mathcal A \)} by
	\[ \sigma(\mathcal A) = \qty{A \subseteq E \colon A \in \mathcal E \text{ for all \( \sigma \)-algebras } \mathcal E \supseteq \mathcal A} \]
	So it is the smallest \( \sigma \)-algebra containing \( \mathcal A \).
	Equivalently,
	\[ \sigma(\mathcal A) = \bigcap_{\mathcal E \supseteq \mathcal A, \mathcal E \text{ a \( \sigma \)-algebra}} \mathcal E \]
\end{definition}

\subsection{Rings and algebras}
To construct good generators, we define the following.
\begin{definition}
	\( \mathcal A \subseteq \mathcal P(E) \) is called a \emph{ring} over \( E \) if \( \varnothing \in \mathcal A \) and \( A, B \in \mathcal A \) implies \( B \setminus A \in \mathcal A \) and \( A \cup B \in \mathcal A \).
\end{definition}
Rings are easier to manage than \( \sigma \)-algebras because there are only finitary operators.
\begin{definition}
	\( \mathcal A \) is called an \emph{algebra} over \( E \) if \( \varnothing \in \mathcal A \) and \( A, B \in \mathcal A \) implies \( A^c \in \mathcal A \) and \( A \cup B \in \mathcal A \).
\end{definition}
\begin{remark}
	Rings are closed under symmetric difference \( A \symmdiff B = (B \setminus A) \cup (A \setminus B) \), and are closed under intersections \( A \cap B = A \cup B \setminus A \symmdiff B \).
	Algebras are rings, because \( B \setminus A = B \cap A^c = (B^c \cup A)^c \).
	Not all rings are algebras, because rings do not need to include the entire space.
\end{remark}
\begin{proposition}[Disjointification of countable unions]
	Consider \( \bigcup_n A_n \) for \( A_n \in \mathcal E \), where \( \mathcal E \) is a \( \sigma \)-algebra (or a ring, if the union is finite).
	Then there exist \( B_n \in \mathcal E \) that are disjoint such that \( \bigcup_n A_n = \bigcup_n B_n \).
\end{proposition}
\begin{proof}
	Define \( \widetilde A_n = \bigcup_{j \leq n} A_j \), then \( B_{n+1} = \widetilde A_n \setminus \widetilde A_{n-1} \).
\end{proof}
\begin{definition}
	A \emph{set function} on a collection \( \mathcal A \) of subsets of \( E \), where \( \varnothing \in \mathcal A \), is a map \( \mu \colon \mathcal A \to [0,\infty] \) such that \( \mu(\varnothing) = 0 \).
	We say \( \mu \) is \emph{increasing} if \( \mu(A) \leq \mu(B) \) for all \( A \subseteq B \) in \( \mathcal A \).
	We say \( \mu \) is \emph{additive} if \( \mu(A \cup B) = \mu(A) + \mu(B) \) for disjoint \( A, B \in \mathcal A \) and \( A \cup B \in \mathcal A \).
	We say \( \mu \) is \emph{countably additive} if \( \mu\qty(\bigcup_n A_n) = \sum_n \mu(A_n) \) for disjoint sequences \( A_n \) where \( \bigcup_n A_n \) and each \( A_n \) lie in \( \mathcal A \).
	We say \( \mu \) is \emph{countably subadditive} if \( \mu\qty(\bigcup_n A_n) \leq \sum_n \mu(A_n) \) for arbitrary sequences \( A_n \) under the above conditions.
\end{definition}
\begin{remark}
	A measure satisfies all four of the above conditions. Countable additivity implies the other conditions.
\end{remark}
\begin{theorem}[Carath\'eodory's theorem]
	Let \( \mu \) be a countably additive set function on a ring \( \mathcal A \) of subsets of \( E \).
	Then there exists a measure \( \mu^\star \) on \( \sigma(\mathcal A) \) such that \( \eval{\mu^\star}_{\mathcal A} = \mu \).
\end{theorem}
We will later prove that this extended measure is unique.
\begin{proof}
	For \( B \subseteq E \), we define the \emph{outer measure} \( \mu^\star \) as
	\[ \mu^\star(B) = \inf \qty{\sum_{n \in \mathbb N} \mu(A_n), A_n \in \mathcal A, B \subseteq \bigcup_{n \in \mathbb N} A_n} \]
	If there is no sequence \( A_n \) such that \( B \subseteq \bigcup_{n \in \mathbb N} A_n \), we declare the outer measure \( \mu^\star(B) \) to be \( \infty \).
	We define the class
	\[ \mathcal M = \qty{A \subseteq E \mid \forall B \subseteq E,\mu^\star(B) = \mu^\star(B \cap A) + \mu^\star(B \cap A^c)} \]
	This is the class of \emph{\( \mu^\star \)-measurable sets}.

	\emph{Step 1.} \( \mu^\star \) is countably sub-additive on \( \mathcal P(E) \).
	It suffices to prove that for \( B \subseteq E \) and \( B_n \subseteq E \) such that \( B \subseteq \bigcup_n B_n \) we have
	\begin{equation}
		\mu^\star(B) \leq \sum_n \mu^\star(B_n)
		\tag{\(\dagger\)}
	\end{equation}
	We can assume without loss of generality that \( \mu^\star(B_n) < \infty \) for all \( n \), otherwise there is nothing to prove.
	For all \( \varepsilon > 0 \) there exists a collection \( A_{n,m} \) such that \( B_n \subseteq \bigcup_m A_{n,m} \) and
	\[ \mu^\star(B_n) + \frac{\varepsilon}{2^n} \geq \sum_m \mu(A_{n,m}) \]
	Now, since \( \mu^\star \) is increasing, and \( B \subseteq \bigcup_n B_n \subseteq \bigcup_n \bigcup_m A_{n,m} \), we have
	\[ \mu^\star(B) \leq \mu^\star\qty(\bigcup_{n,m} A_{n,m}) \leq \sum_{n,m} \mu(A_{n,m}) \leq \sum_n \mu^\star(B_n) + \sum_n \frac{\varepsilon}{2^n} = \sum_n \mu^\star(B_n) + \varepsilon \]
	Since \( \varepsilon \) was arbitrary in the construction, \( (\dagger) \) follows by construction.

	\emph{Step 2.} \( \mu^\star \) extends \( \mu \).
	Let \( A \in \mathcal A \), and we want to show \( \mu^\star(A) = \mu(A) \).
	We can write \( A = A \cup \varnothing \cup \dots \), hence \( \mu^\star(A) \leq \mu(A) + 0 + \dots = \mu(A) \) by definition of \( \mu^\star \).
	We need to prove the converse, that \( \mu(A) \leq \mu^\star(A) \).
	If \( \mu^\star \) is infinite, there is nothing to prove.
	For the finite case, suppose there is a sequence \( A_n \) where \( \mu(A_n) < \infty \) and \( A \subseteq \bigcup_n A_n \).
	Then, \( A = \bigcup_n (A \cap A_n) \), which is a union of elements of the ring \( \mathcal A \).
	Since \( \mu \) is a countably additive set function on \( \mathcal A \), it is countably subadditive.
	Hence \( \mu(A) \leq \sum_n \mu(A \cap A_n) \leq \sum_n \mu(A_n) \).
	Since the \( A_n \) were arbitrary, we have \( \mu(A) \leq \mu^\star(A) \) as required.

	\emph{Step 3.} \( \mathcal M \supseteq \mathcal A \).
	Let \( A \in \mathcal A \).
	We must show that for all \( B \subseteq E \), \( \mu^\star(B) = \mu^\star(B \cap A) + \mu^\star(B \cap A^c) \).
	We have \( B \subseteq (B \cap A) \cup (B \cap A^c) \cup \varnothing \cup \dots \), hence by countable subadditivity \( (\dagger) \), \( \mu^\star(B) \leq \mu^\star(B \cap A) + \mu^\star(B \cap A^c) \).
	It now suffices to prove the converse, that \( \mu^\star(B) \geq \mu^\star(B \cap A) + \mu^\star(B \cap A^c) \).
	We can assume \( \mu^\star(B) \) is finite, and assume there exists \( A_n \in \mathcal A \) such that \( B \subseteq \bigcup_n A_n \) and \( \mu^\star(B) + \varepsilon \geq \sum_n \mu(A_n) \).
	Now, \( B \cap A \subseteq \bigcup_n (A_n \cap A) \), and \( B \cap A^c \subseteq \bigcup_n (A_n \cap A^c) \).
	All of the members of these two unions are elements of \( \mathcal A \), since \( A_n \cap A^c = A_n \setminus A \).
	Therefore,
	\begin{align*}
		\mu^\star(B \cap A) + \mu^\star(B \cap A^c) &\leq \sum_n \mu(A_n \cap A) + \sum_n \mu(A_n \cap A^c) \\
		&\leq \sum_n \qty[ \mu(A_n \cap A) + \mu(A_n \cap A^c) ] \\
		&\leq \sum_n \mu(A_n) \leq \mu^\star(B) + \varepsilon
	\end{align*}
	Since \( \varepsilon \) was arbitrary, \( \mu^\star(B) = \mu^\star(B \cap A) + \mu^\star(B \cap A^c) \) as required.

	\emph{Step 4.} \( \mathcal M \) is an algebra.
	Clearly \( \varnothing \) lies in \( \mathcal M \), and by the symmetry in the definition of \( \mathcal M \), complements lie in \( \mathcal M \).
	We need to check \( \mathcal M \) is stable under finite intersections.
	Let \( A_1, A_2 \in \mathcal M \) and let \( B \subseteq E \).
	We have
	\[ \mu^\star(B) = \mu^\star(B \cap A_1) + \mu^\star(B \cap A_1^c) = \mu^\star(B \cap A_1 \cap A_2) + \mu^\star(B \cap A_1 \cap A_2^c) + \mu^\star(B \cap A_1^c) \]
	We can write \( A_1 \cap A_2^c = (A_1 \cap A_2^c)^c \cap A_1 \), and \( A_1^c = (A_1 \cap A_2)^c \cap A_1^c \).
	Hence
	\begin{align*}
		\mu^\star(B) &= \mu^\star(B \cap A_1 \cap A_2) + \mu^\star(B \cap (A_1 \cap A_2)^c \cap A_1) + \mu^\star(B \cap (A_1 \cap A_2)^c \cap A_1^c) \\
		&= \mu^\star(B \cap A_1 \cap A_2) + \mu^\star(B \cap (A_1 \cap A_2)^c)
	\end{align*}
	which is the requirement for \( A_1 \cap A_2 \) to lie in \( \mathcal M \).

	\emph{Step 5.} \( \mathcal M \) is a \( \sigma \)-algebra and \( \mu^\star \) is a measure on \( \mathcal M \).
	It suffices now to show that \( \mathcal M \) has countable unions and the measure respects these countable unions.
	Let \( A = \bigcup_n A_n \) for \( A_n \in \mathcal M \).
	Without loss of generality, let the \( A_n \) be disjoint.
	We want to show \( A \in \mathcal M \), and that \( \mu^\star(A) = \sum_n \mu^\star(A_n) \).
	By \( (\dagger) \), we have \( \mu^\star(B) \leq \mu^\star(B \cap A) + \mu^\star(B \cap A^c) + 0 + \dots \) so we need to check only the converse of this inequality.
	Also, \( \mu^\star(A) \leq \sum_n \mu^\star(A_n) \), so we need only check the converse of this inequality as well.
	Similarly to before,
	\begin{align*}
		\mu^\star(B) &= \mu^\star(B \cap A_1) + \mu^\star(B \cap A_1^c) \\
		&= \mu^\star(B \cap A_1) + \mu^\star(B \cap A_1^c \cap A_2) + \mu^\star(B \cap A_1^c \cap A_2^c) \\
		&= \mu^\star(B \cap A_1) + \mu^\star(B \cap A_2) + \mu^\star(B \cap A_1^c \cap A_2^c) \\
		&= \mu^\star(B \cap A_1) + \mu^\star(B \cap A_2) + \mu^\star(B \cap A_1^c \cap A_2^c \cap A_3) + \mu^\star(B \cap A_1^c \cap A_2^c \cap A_3^c) \\
		&= \mu^\star(B \cap A_1) + \mu^\star(B \cap A_2) + \mu^\star(B \cap A_3) + \mu^\star(B \cap A_1^c \cap A_2^c \cap A_3^c) \\
		&= \cdots \\
		&= \sum_{n \leq N} \mu^\star(B \cap A_n) + \mu^\star(B \cap A_1^c \cap \dots \cap A_N^c)
	\end{align*}
	Since \( \bigcup_{n \leq N} A_n \subseteq A \), we have \( \bigcap_{n \leq N} A_n^c \supseteq A^c \).
	\( \mu^\star \) is increasing, hence, taking limits,
	\[ \mu^\star(B) \geq \sum_{n=1}^\infty \mu^\star(B \cap A_n) + \mu^\star(B \cap A^c) \]
	By \( (\dagger) \),
	\[ \mu^\star(B) \geq \mu^\star(B \cap A) + \mu^\star(B \cap A^c) \]
	as required.
	Hence \( \mathcal M \) is a \( \sigma \)-algebra.
	For the other inequality, we take the above result for \( B = A \).
	\[ \mu^\star(A) \geq \sum_{n=1}^\infty \mu^\star(A \cap A_n) + \mu^\star(A \cap A^c) = \sum_{n=1}^\infty \mu^\star(A_n) \]
	So \( \mu^\star \) is countably additive on \( \mathcal M \) and is hence a measure on \( \mathcal M \).
\end{proof}

\subsection{Uniqueness of extension}
\begin{definition}
	A collection \( \mathcal A \) of subsets of \( E \) is called a \emph{\( \pi \)-system} if \( \varnothing \in \mathcal A \) and \( A, B \in \mathcal A \implies A \cap B \in \mathcal A \).
\end{definition}
\begin{definition}
	A collection \( \mathcal A \) of subsets of \( E \) is called a \emph{\( d \)-system} if \( E \in \mathcal A \), and if \( B_1 \subset B_2 \) are elements of \( \mathcal A \), we have \( B_2 \setminus B_1 \in \mathcal A \), and if \( A_n \in \mathcal A \) and \( A_n \) is an increasing sequence of sets, we have \( \bigcup_n A_n \in \mathcal A \).
\end{definition}
\begin{proposition}
	A \( d \)-system which is also a \( \pi \)-system is a \( \sigma \)-algebra.
\end{proposition}
\begin{proof}
	Refer to the first example sheet.
\end{proof}
\begin{lemma}[Dynkin]
	Let \( \mathcal A \) be a \( \pi \)-system.
	Then any \( d \)-system that contains \( \mathcal A \) also contains \( \sigma(\mathcal A) \).
\end{lemma}
\begin{proof}
	We define
	\[ \mathcal D = \bigcap_{\mathcal D' \text{ is a } d \text{-system};\; \mathcal D' \supseteq \mathcal A} \mathcal D' \]
	We can show this is a \( d \)-system.
	It suffices to prove that \( \mathcal D \) is a \( \pi \)-system, because this is then a \( \sigma \)-algebra.
	We now define
	\[ \mathcal D' = \qty{B \in \mathcal D \mid \forall A \in \mathcal A, B \cap A \in \mathcal D} \]
	We can see that \( \mathcal D' \supseteq \mathcal A \), as \( \mathcal A \) is a \( \pi \)-system.
	We now show that \( \mathcal D' \) is a \( d \)-system.
	Clearly \( E \cap A = A \in \mathcal A \subseteq \mathcal D' \) hence \( E \in \mathcal D' \).
	Let \( B_1, B_2 \in \mathcal D' \) such that \( B_1 \subseteq B_2 \).
	Then \( (B_2 \setminus B_1) \cap A = (B_2 \cap A) \setminus (B_1 \cap A) \), and since \( B_i \cap A \in \mathcal D \) this difference also lies in \( \mathcal D \), so \( B_2 \setminus B_1 \in \mathcal D' \).
	Now, suppose \( B_n \) is an increasing sequence converging to \( B \), and \( B_n \in \mathcal D' \).
	Then \( B_n \cap A \in \mathcal D \), and \( \mathcal D \) is a \( d \)-system, we have \( B \cap A \in \mathcal D \), so \( B \in \mathcal D' \).

	Hence \( \mathcal D' \) is a \( d \)-system that contains \( \mathcal A \), so \( \mathcal D \subseteq \mathcal D' \), and \( \mathcal D' \subseteq \mathcal D \) by construction of \( \mathcal D' \), giving \( \mathcal D = \mathcal D' \).
	We then define
	\[ \mathcal D'' = \qty{B \in \mathcal D \mid \forall A \in \mathcal D, B \cap A \in \mathcal D} \]
	Note that \( \mathcal A \subseteq \mathcal D'' \), because \( \mathcal D' = \mathcal D \supseteq \mathcal A \).
	Running the same argument as before, we can show that \( \mathcal D'' = \mathcal D \), and so \( \mathcal D'' = \mathcal D \) is a \( \pi \)-system.
\end{proof}
\begin{theorem}[Uniqueness of extension]
	Let \( \mu_1, \mu_2 \) be measures on a measurable space \( (E, \mathcal E) \), such that \( \mu_1(E) = \mu_2(E) < \infty \).
	Suppose that \( \mu_1 \) and \( \mu_2 \) coincide on a \( \pi \)-system \( \mathcal A \), such that \( \mathcal E \subseteq \sigma(\mathcal A) \).
	Then \( \mu_1 = \mu_2 \) on \( \sigma(\mathcal A) \), and hence on \( \mathcal E \).
\end{theorem}
\begin{proof}
	We define
	\[ \mathcal D = \qty{A \in \mathcal E \mid \mu_1(A) = \mu_2(A)} \]
	This collection contains \( \mathcal A \) by assumption.
	By Dynkin's lemma, it suffices to prove \( \mathcal D \) is a \( d \)-system, because then \( \mathcal D \supseteq \sigma(\mathcal A) \supseteq \mathcal E \) giving \( \mathcal D = \mathcal E \).
	Note that \( E \in \mathcal D \) by assumption.
	By additivity and finiteness of \( \mu_i \), for \( B_1 \subseteq B_2 \) elements of \( \mathcal D \), we have \( \mu_1(B_2 \setminus B_1) = \mu_1(B_2) - \mu_1(B_1) = \mu_2(B_2) - \mu_2(B_1) = \mu_2(B_2 \setminus B_1) \), where the subtractions are valid by finiteness of \( \mu \), so set differences lie in \( \mathcal D \).

	Now suppose \( B_n \) is an increasing sequence converging to \( B \) for \( B_n \in \mathcal D \).
	This implies that \( B \setminus B_n \) is a decreasing sequence converging to \( \varnothing \), and by a result from the first example sheet we have \( \mu_i(B \setminus B_n) \to \mu(\varnothing) = 0 \).
	Since \( \mu_i \) are finite, \( \mu_i(B_n) \to \mu_i(B) \) as \( n \to \infty \).
	Then, \( \mu_1(B) = \lim_{n \in \mathbb N} \mu_1(B_n) = \lim_{n \in \mathbb N} \mu_2(B_n) = \mu_2(B) \), so \( \mathcal D \) is closed under increasing sequences and hence is a \( d \) system.
\end{proof}
\begin{remark}
	The above theorem applies to finite measures (\( \mu \) such that \( \mu(E) < \infty \)) only.
	However, the theorem can be extended to measures that are \( \sigma \)-finite, for which \( E = \bigcup_{n \in \mathbb N} E_n \) where \( \mu(E_n) < \infty \).
\end{remark}

\subsection{Borel measures}
\begin{definition}
	Let \( (E, \tau) \) be a Hausdorff topological space.
	The \( \sigma \)-algebra generated by the open sets of \( E \) is called the \emph{Borel \( \sigma \)-algebra} on \( E \), denoted \( \mathcal B(E) = \sigma(\tau) \).
	We write \( \mathcal B = \mathcal B(\mathbb R) \).
	Members of \( \mathcal B(E) \) are called \emph{Borel sets}.
	A measure \( \mu \) on \( (E, \mathcal B(E)) \) is called a \emph{Borel measure on \( E \)}.
	A \emph{Radon measure} is a Borel measure \( \mu \) on \( E \) such that \( \mu(K) < \infty \) for all \( K \subseteq E \) compact.
	Note that in a Hausdorff space, compact sets are closed and hence measurable.
\end{definition}

\subsection{Lebesgue measure}
We will construct a unique Borel measure \( \mu \) on \( \mathbb R^d \) such that
\[ \mu\qty(\prod_{i=1}^d [a_i, b_i]) = \prod_{i=1}^d \abs{b_i - a_i} \]
Initially, we will perform this construction for \( d = 1 \), and later we will consider product measures to extend this to higher dimensions.
\begin{theorem}[Construction of the Lebesgue measure]
	There exists a unique Borel measure \( \mu \) on \( \mathbb R \) such that
	\[ a < b \implies \mu((a,b]) = b - a \]
\end{theorem}
\begin{proof}
	Consider the subsets of \( \mathbb R \) of the form
	\[ A = (a_1,b_1] \cup \dots \cup (a_n,b_n] \]
	where the intervals in question are disjoint.
	The set \( \mathcal A \) of such sets forms a ring and a \( \pi \)-system of Borel sets.
	This generates the same \( \sigma \)-algebra as that generated by finite unions of open intervals, by the first example sheet.
	Open intervals with rational endpoints generate \( \mathcal B \), so \( \sigma(A) \supseteq \mathcal B \).
	% TODO: Check countability?
	We define the set function \( \mu \) on \( \mathcal A \) by \( \mu(A) = \sum_{i=1}^n (b_i - a_i) \).
	\( \mu \) is additive, and well-defined since if \( A = \bigcup_j C_j = \bigcup_k D_k \) for distinct disjoint unions, we can write \( C_j = \bigcup_k (C_j \cap D_k) \) and \( D_k = \bigcup_j (D_k \cap C_j) \), giving
	\[ \mu(A) = \mu\qty(\bigcup_j C_j) = \sum_j \mu(C_j) = \sum_j \mu\qty(\bigcup_j (C_j \cap D_k)) = \sum_j \sum_k \mu(C_j \cap D_k) = \mu\qty(\bigcup_k D_k) \]
	To prove the existence of \( \mu \) on \( \mathcal B \), we apply Carath\'eodory's extension theorem, and therefore must check that \( \mu \) is countably additive on \( \mathcal A \).
	Equivalently, by a question on an example sheet, it suffices to show that for all sequences \( A_n \in \mathcal A \) such that \( A_n \) decreases to \( \varnothing \), we have \( \mu(A_n) \to 0 \).
	Suppose this is not the case, so there exist \( \varepsilon > 0 \) and \( B_n \in \mathcal A \) such that \( B_n \) decreases to \( \varnothing \) but \( \mu(B_n) \geq 2\varepsilon \) for infinitely many \( n \) (and so without loss of generality for all \( n \)).
	We can approximate \( B_n \) from within by a sequence \( C_n \).
	Suppose \( B_n = \bigcup_{i=1}^{N_n} (a_{ni},b_{ni}] \), then define \( C_n = \bigcup_{i=1}^{N_n} (a_{ni}+\frac{2^{-n}\varepsilon}{N_n}, b_{ni}] \).
	Note that the \( C_n \) lie in \( \mathcal A \), and \( \mu(B_n \setminus C_n) \leq 2^{-n}\varepsilon \)
	Since \( B_n \) is decreasing, we have \( B_N = \bigcap_{n \leq N} B_n \), and
	\[ B_N \setminus (C_1 \cap \dots \cap C_N) = B_n \cap \qty(\bigcup_{n \leq N} C_n^c) = \bigcup_{n \leq N} B_N \setminus C_n \subseteq \bigcup_{n \leq N} B_n \setminus C_n \]
	Since \( \mu \) is increasing,
	\[ \mu(B_N \setminus (C_1 \cap \dots \cap C_N)) \leq \mu\qty(\bigcup_{n \leq N} B_n \setminus C_n) \leq \sum_{n \leq N} \mu(B_n \setminus C_n) \leq \sum_{n \leq N} 2^{-N}\varepsilon \leq \varepsilon \]
	Since in addition \( \mu(B_N) \geq 2\varepsilon \), additivity implies that \( \mu(C_1 \cap \dots \cap C_N) \geq \varepsilon \).
	This means that \( C_1 \cap \dots \cap C_N \) cannot be empty.
	We can add the left endpoints of the intervals, giving \( K_N = \overline C_1 \cap \dots \cap \overline C_N \).
	By Analysis I, \( K_N \) is a nested sequence of nonempty closed intervals and therefore there is a point \( x \in \mathbb R \) such that \( x \in K_N \) for all \( N \).
	But \( K_N \subseteq \overline C_N \subseteq B_N \), so \( x \in \bigcap_N B_n \), which is a contradiction since \( \bigcap_N B_N \) is empty.
	Therefore, a measure \( \mu \) on \( \mathcal B \) exists.

	Now we prove uniqueness.
	Suppose \( \mu, \lambda \) are measures such that the measure of an interval \( (a,b] \) is \( b - a \).
	We define new measures \( \mu_n(A) = \mu(A \cap (n,n+1]) \) and \( \lambda_n(A) = \lambda(A \cap (n,n+1]) \).
	These new measures are finite with total mass 1.
	Hence, we can use the uniqueness of extension theorem to show \( \mu_n = \lambda_n \) on \( \mathcal B \).
	We find
	\[ \mu(A) = \mu\left(\bigcup_n A \cap (n,n+1]\right) = \sum_{n \in \mathbb Z} \mu(A \cap (n,n+1]) = \sum_{n \in \mathbb Z} \mu_n(A) = \sum_{n \in \mathbb Z} \lambda_n(A) = \dots = \lambda(A) \]
\end{proof}
\begin{definition}
	A Borel set \( B \in \mathcal B \) is called a \emph{Lebesgue null set} if \( \mu(B) = 0 \).
\end{definition}
\begin{remark}
	A singleton \( \qty{x} \) can be written as \( \bigcap_n \left(x-\frac 1n, x\right] \), hence \( \mu({x}) = \lim_n \frac 1n = 0 \).
	Hence singletons are null sets.
	In particular, \( \mu((a,b)) = \mu((a,b]) = \mu([a,b)) = \mu([a,b]) \).
	Any countable set \( Q = \bigcup_q \qty{q} \) is a null set.
	Not all null sets are countable; the Cantor set is an example.

	The Lebesgue measure is \emph{translation-invariant}.
	Let \( x \in \mathbb R \), then the set \( B + x = \qty{b + x \mid b \in B} \) lies in \( \mathcal B \) if and only if \( B \in \mathcal B \), and in this case, it satisfies \( \mu(B + x) = \mu(B) \).
	We can define the translated Lebesgue measure \( \mu_x(B) = \mu(B + x) \) for all \( B \in \mathcal B \), but since the Lebesgue measure is unique, \( \mu_x = \mu \).

	The class of outer measurable sets \( \mathcal M \) used in Carath\'eodory's extension theorem is here called the class of Lebesgue measurable sets.
	This class can be shown to be
	\[ \mathcal M = \qty{ M = A \cup N, A \in \mathcal B, N \subseteq B, B \in \mathcal B, \mu(B) = 0 } \supsetneq \mathcal B \]
\end{remark}

\subsection{Existence of non-measurable sets}
Assuming the axiom of choice, there exists a non-measurable set of reals.
Consider \( E = (0,1] \) with addition defined modulo one.
By the same argument as before, the Lebesgue measure is translation-invariant modulo one.
Consider the subgroup \( Q = E \cap \mathbb Q \) of \( (E, +) \).
We define \( x \sim y \) if \( x - y \in Q \).
Then, this gives equivalence classes \( [x] = \qty{y \in E \colon x \sim y} \) for all \( x \in E \).
Assuming the axiom of choice, we can select a representative of \( [x] \) for each \( x \in E \), and denote by \( S \) the set of such representatives.
We can partition \( E \) into the union of its cosets, so \( E = \bigcup_{q \in Q} (S + q) \) is a disjoint union.

Suppose \( S \) is a Borel set.
Then \( S + q \) is also a Borel set.
We can therefore write
\[ 1 = \mu(E) = \mu\qty(\bigcup_{q \in Q}(S+q)) = \sum_{q \in Q} \mu(S+q) = \sum_{q \in Q} \mu(S) \]
But no value for \( \mu(S) \in [0,\infty] \) can be assigned to make this equation hold.
Therefore \( S \) is not a Borel set.

One can further show that \( \mu \) cannot be extended to all subsets \( \mathcal P(E) \).
\begin{theorem}[Banach, Kuratowski]
	Assuming the continuum hypothesis, there exists no measure \( \mu \) on the set \( \mathcal P((0,1]) \) such that \( \mu((0,1]) = 1 \) and \( \mu(\qty{x}) = 0 \) for \( x \in (0,1] \).
\end{theorem}

\subsection{Probability spaces}
\begin{definition}
	If a measure space \( (E, \mathcal E, \mu) \) has \( \mu(E) = 1 \), we call it a \emph{probability space}, and instead write \( (\Omega, \mathcal F, \mathbb P) \).
	We call \( \Omega \) the outcome space or sample space, \( \mathcal F \) the set of events, and \( \mathbb P \) the probability measure.
\end{definition}
The axioms of probability theory (Kolmogorov, 1933), are
\begin{enumerate}
	\item \( \prob{\Omega} = 1 \);
	\item \( 0 \leq \prob{E} \leq 1 \) for all \( E \in \mathcal F \);
	\item if \( A_n \) are a disjoint sequence of events in \( \mathcal F \), then \( \prob{\bigcup_n A_n} = \sum_n \prob{A_n} \).
\end{enumerate}
This is exactly what is required by our definition: \( \mathbb P \) is a measure on a \( \sigma \)-algebra.
\begin{definition}
	Events \( A_i, i \in I \) are \emph{independent} if for all finite \( J \subseteq I \), we have
	\[ \prob{\bigcap_{j \in J} A_j} = \prod_{j \in J} \prob{A_j} \]
	\( \sigma \)-algebras \( \mathcal A_i, i \in I \) are independent if for any \( A_j \in \mathcal A_j \) where \( J \subseteq I \) is finite, the \( A_j \) are independent.
\end{definition}
Kolmogorov showed that these definitions are sufficient to derive the law of large numbers.
\begin{proposition}
	Let \( \mathcal A_1, \mathcal A_2 \) be \( \pi \)-systems of sets in \( \mathcal F \).
	Suppose \( \prob{A_1 \cap A_2} = \prob{A_1} \prob{A_2} \) for all \( A_1 \in \mathcal A_1, A_2 \in \mathcal A_2 \).
	Then the \( \sigma \)-algebras \( \sigma(\mathcal A_1), \sigma(\mathcal A_2) \) are independent.
\end{proposition}
This follows by uniqueness.

\subsection{Borel--Cantelli lemmas}
\begin{definition}
	Let \( A_n \in \mathcal F \) be a sequence of events.
	Then the \emph{limit superior} of \( A_n \) is
	\[ \limsup_n A_n = \bigcap_n \bigcup_{m \geq n} A_m = \qty{A_n \text{ infinitely often}} \]
	The \emph{limit inferior} of \( A_n \) is
	\[ \liminf_n A_n = \bigcup_n \bigcap_{m \geq n} A_m = \qty{A_n \text{ eventually}} \]
\end{definition}
\begin{lemma}[First Borel--Cantelli lemma]
	Let \( A_n \in \mathcal F \) be a sequence of events such that \( \sum_n \prob{A_n} < \infty \).
	Then \( \prob{A_n \text{ infinitely often}} = 0 \).
\end{lemma}
\begin{proof}
	For all \( n \), we have
	\[ \prob{\limsup_n A_n} = \prob{\bigcap_n \bigcup_{m \geq n} A_m} \leq \prob{\bigcup_{m \geq n} A_m} \leq \sum_{m \geq n} \prob{A_m} \to 0 \]
\end{proof}
This proof did not require that \( \mathbb P \) be a probability measure, just that it is a measure.
Therefore, we can use this for arbitrary measures.
\begin{lemma}[Second Borel--Cantelli lemma]
	Let \( A_n \in \mathcal F \) be a sequence of independent events, and \( \sum_n \prob{A_n} = \infty \).
	Then \( \prob{A_n \text{ infinitely often}} = 1 \).
\end{lemma}
\begin{proof}
	By independence, for all \( N \geq n \in \mathbb N \) and using \( 1 - a \leq e^{-a} \), we find
	\[ \prob{\bigcap_{m=n}^N A_m^c} = \prod_{m=n}^N \qty(1 - \prob{A_m}) \leq \prod_{m=n}^N e^{-\prob{A_m}} = e^{-\sum_{m=n}^N \prob{A_m}} \]
	As \( N \to \infty \), this approaches zero.
	Since \( \bigcap_{m=n}^N A_m^c \) decreases to \( \bigcap_{m=n}^\infty A_m^c \), by countable additivity we must have \( \prob{\bigcap_{m=n}^\infty A_m^c} = 0 \).
	But then
	\[ \prob{A_n \text{ infinitely often}} = \prob{\bigcap_n \bigcup_{m \geq n} A_m} = 1 - \prob{\bigcup_n \bigcap_{m \geq n} A_m^c} \geq 1 - \sum_n \prob{\bigcap_{m \geq n} A_m^c} = 1 \]
	Hence this probability is equal to one.
\end{proof}
